This file contains R code to model parking meter activity using various time series models:

For every model there are two versions which correspond with two datasets:

setwd("/Users/stephenreagin/Desktop/USD/ADS_506_final_project/final_project/datasets/")

library(forecast)
#install.packages('GGally')
library(fpp3)
library(tidyverse)
library(dplyr)

transactions_df <- read.csv("daily_total_transactions.csv")
transactions_df$date <- as.Date(transactions_df$date)

transactions_tsibble <- as_tsibble(transactions_df, index=date)

#training and validation data splits
train_transactions_tsibble <- transactions_tsibble |> 
  slice(0:(length(transactions_df$transactions)-90))
valid_transactions_tsibble <- transactions_tsibble |> slice(n()-89:0)

plot(x=transactions_tsibble$date, y=transactions_tsibble$transactions,
     xlab="Date",ylab="Daily Transactions", main='Daily Transactions over Time (all days)')

######## remove holidays and Sundays
nonspecial_df <- subset(transactions_tsibble, (isSunday == 0) & (isHoliday == 0))
special_df <- subset(transactions_tsibble, (isSunday == 1) | (isHoliday == 1))

nonspecial_df$X <- 1:length(nonspecial_df$X)
nonspecial_df <- as_tsibble(nonspecial_df, index=date) 

train_nonspecial_df <- nonspecial_df |> slice(0:(length(nonspecial_df$transactions)-90))
valid_nonspecial_df <- nonspecial_df |> slice(n()-89:0)

plot(x=nonspecial_df$date, y=nonspecial_df$transactions, xlab="Date",ylab="Daily Transactions", 
     main='Daily Transactions over Time (no Sundays or holidays)')

Linear Regression Models

Linear Regression - All Days

\(y_t = \beta_{0}+\beta_{1}{trend}+\beta_{2}season+\beta_{3}rain+\beta_{4}Tavg+\beta_{5}Tmin+\epsilon_{t}\)

#fit the model
fit_lm <- train_transactions_tsibble |> model(lm = TSLM(transactions ~ TAVG + 
                PRCP + TMIN + isSunday + isHoliday + trend() + season()))
report(fit_lm)
## Series: transactions 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17515.5   -392.1    484.8   1164.8  15826.5 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.622e+04  7.253e+02  22.360  < 2e-16 ***
## TAVG           6.542e+01  7.058e+01   0.927 0.354191    
## PRCP          -5.896e+01  3.136e+01  -1.880 0.060399 .  
## TMIN           1.437e+02  5.367e+01   2.677 0.007555 ** 
## isSunday      -1.781e+04  3.828e+02 -46.515  < 2e-16 ***
## isHoliday     -1.381e+04  5.190e+02 -26.608  < 2e-16 ***
## trend()       -2.558e+00  3.794e-01  -6.743 2.72e-11 ***
## season()week2  8.892e+02  3.826e+02   2.324 0.020331 *  
## season()week3 -7.695e+02  3.826e+02  -2.011 0.044621 *  
## season()week4         NA         NA      NA       NA    
## season()week5 -1.328e+03  3.853e+02  -3.448 0.000591 ***
## season()week6 -5.042e+02  3.838e+02  -1.314 0.189284    
## season()week7 -8.344e+01  3.833e+02  -0.218 0.827701    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3130 on 928 degrees of freedom
## Multiple R-squared: 0.8244,  Adjusted R-squared: 0.8224
## F-statistic: 396.2 on 11 and 928 DF, p-value: < 2.22e-16
fit_lm |> gg_tsresiduals() + labs("Linear Regression training errors")

#forecast using validation data
fc_lm <- forecast(fit_lm, new_data = valid_transactions_tsibble)

#transactions_tsibble |> autoplot(transactions) + autolayer(fc_lm)
transactions_tsibble |> autoplot(transactions,color='darkgray') + autolayer(fc_lm, level=NULL) +
  labs(title="Linear Regression 90-day forecast (training set 1)", xlab="Date",ylab="Transactions")

sum(fc_lm$.mean) / sum(valid_transactions_tsibble$transactions)
## [1] 0.9852133
accuracy(fit_lm)
## # A tibble: 1 × 10
##   .model .type          ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm     Training 3.30e-13 3110. 1545.   NaN   Inf 0.809 0.621 0.573
accuracy(fc_lm, valid_transactions_tsibble)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm     Test   206. 1670.  858.  39.4  84.9   NaN   NaN 0.144

Linear Regression - No Sundays or Holidays

fit_lm <- nonspecial_df |> model(lm = TSLM(transactions ~ TAVG + PRCP + TMIN + TMAX + 
                                             trend() + season()))

report(fit_lm)
## Series: transactions 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17570.8   -235.7    470.7   1161.4   4354.0 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   16174.4499   722.7406  22.379  < 2e-16 ***
## TAVG            -49.5142   180.0678  -0.275 0.783404    
## PRCP           -150.6594    40.1717  -3.750 0.000189 ***
## TMIN            198.3696    91.9152   2.158 0.031201 *  
## TMAX             77.0887    85.2088   0.905 0.365886    
## trend()          -2.9777     0.3478  -8.561  < 2e-16 ***
## season()week2   763.8270   359.2533   2.126 0.033786 *  
## season()week3  -879.2885   358.4847  -2.453 0.014381 *  
## season()week4         NA         NA      NA       NA    
## season()week5 -1507.8476   367.7587  -4.100 4.54e-05 ***
## season()week6  -554.7763   355.7285  -1.560 0.119248    
## season()week7  -171.1755   355.2703  -0.482 0.630063    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3004 on 828 degrees of freedom
## Multiple R-squared: 0.2163,  Adjusted R-squared: 0.2068
## F-statistic: 22.85 on 10 and 828 DF, p-value: < 2.22e-16
fc_lm <- forecast(fit_lm, new_data = valid_nonspecial_df)

nonspecial_df |> autoplot(transactions, color='darkgray')  + 
  autolayer(fc_lm, level=NULL) +
  labs(title="Linear Regression 90-day forecast (training set 2)", xlab="Date",ylab="Transactions")

sum(fc_lm$.mean) / sum(valid_nonspecial_df$transactions)
## [1] 0.992725
accuracy(fit_lm)
## # A tibble: 1 × 10
##   .model .type          ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm     Training 6.77e-13 2985. 1431.  -Inf   Inf 0.961 0.980 0.786
accuracy(fc_lm, valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm     Test   124.  867.  612. 0.585  3.68   NaN   NaN 0.427

Redefine tsibble object

For the following models, having gaps in the date will create errors. This is especially a problem for the dataset which includes No Sundays and No Holidays, because it has at least one gap every week. Therefore, I am going to create a dummy index and have the models use the dummy index as a proxy for the date.

#need to redefine tsibble objects to key on a dummy index

nonspecial_df <- subset(transactions_tsibble, (isSunday == 0) & (isHoliday == 0))
special_df <- subset(transactions_tsibble, (isSunday == 1) | (isHoliday == 1))

nonspecial_df$X <- 1:length(nonspecial_df$X)
nonspecial_df <- as_tsibble(nonspecial_df, index=X) 

train_nonspecial_df <- nonspecial_df |> slice(0:(length(nonspecial_df$transactions)-90))
valid_nonspecial_df <- nonspecial_df |> slice(n()-89:0)

ARIMA Models

The (P,D,Q) values for the ARIMA models were automatically generated by running ARIMA(season_adjust, stepwise=FALSE, approx=FALSE) and manually coding the results so as to reduce runtime in subsequent notebook renderings.

ARIMA - All Days

fit <- train_transactions_tsibble |> model(ARIMA(transactions ~ pdq(0,0,2) + PDQ(0,1,1)))
report(fit)
## Series: transactions 
## Model: ARIMA(0,0,2)(0,1,1)[7] 
## 
## Coefficients:
##          ma1     ma2     sma1
##       0.3012  0.2211  -0.9206
## s.e.  0.0341  0.0295   0.0355
## 
## sigma^2 estimated as 15390813:  log likelihood=-9049.28
## AIC=18106.56   AICc=18106.6   BIC=18125.91
fit |> gg_tsresiduals() + labs("ARIMA training errors")

fc <- fit |> forecast(h=180) 

transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
  labs(title="ARIMA 180-day forecast (training set 1)", xlab="Date",ylab="Transactions")

sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.050678
accuracy(fit)
## # A tibble: 1 × 10
##   .model                  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 ARIMA(transactions ~ p… Trai…  48.9 3902. 2107.   NaN   Inf  1.10 0.779 0.0345
accuracy(fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(transactions ~ pd… Test  -707. 3039.  981. -149.  153.   NaN   NaN 0.117

ARIMA - No Sundays or holidays

fit <- train_nonspecial_df |> model(ARIMA(transactions ~ pdq(4,1,2)))
report(fit)
## Series: transactions 
## Model: ARIMA(4,1,2) 
## 
## Coefficients:
##          ar1      ar2      ar3      ar4      ma1     ma2
##       0.3780  -0.7615  -0.2269  -0.2526  -0.7925  0.8152
## s.e.  0.0619   0.0548   0.0371   0.0391   0.0518  0.0616
## 
## sigma^2 estimated as 4045882:  log likelihood=-6748.47
## AIC=13510.94   AICc=13511.09   BIC=13543.26
fit |> gg_tsresiduals() + labs("ARIMA training errors")

fc <- fit |> forecast(h=180) 

nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) + 
  labs(title="ARIMA 180-day forecast (training set 2)", xlab="Date",ylab="Transactions")

sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.25884
accuracy(fit)
## # A tibble: 1 × 10
##   .model                 .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ARIMA(transactions ~ … Trai…  15.8 2002.  984.   NaN   Inf 0.890 0.895 -0.0120
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(transactions ~ pd… Test  -450. 1157.  887. -3.05  5.41   NaN   NaN 0.438

ETS

ETS - All Days

ets_fit <- train_transactions_tsibble |> model(ETS(transactions))
report(ets_fit)
## Series: transactions 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.193608 
##     gamma = 0.000100066 
## 
##   Initial states:
##      l[0]     s[0]    s[-1]    s[-2]    s[-3]   s[-4]    s[-5]     s[-6]
##  13192.06 2139.278 3599.093 2860.265 3203.008 2823.13 222.9795 -14847.75
## 
##   sigma^2:  14130134
## 
##      AIC     AICc      BIC 
## 21922.07 21922.31 21970.53
ets_fit |> gg_tsresiduals() + labs("ETS training errors")

ets_fit |> components() |> autoplot()

ets_fc <- ets_fit |> forecast(h=180)
fitted_ets <- ets_fit |> augment()

#training
accuracy(fitted_ets$.fitted, train_transactions_tsibble$transactions)
##                ME     RMSE      MAE MPE MAPE
## Test set 12.78305 3740.969 1909.357 NaN  Inf
#validation
accuracy(ets_fc$.mean[1:90], valid_transactions_tsibble$transactions)
##                 ME     RMSE     MAE       MPE     MAPE
## Test set -1569.208 3338.123 1624.55 -186.2008 186.5356
ets_fc |> autoplot(transactions_tsibble, level=NULL, color='red') + 
  ggtitle("ETS Model (ANA) 180-day forecast (training set 1)") +
  geom_line(data=ets_fit |> augment(), aes(y=.fitted), color='blue', lty=2)

sum(ets_fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.112513
accuracy(ets_fit)
## # A tibble: 1 × 10
##   .model            .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>             <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 ETS(transactions) Training  12.8 3741. 1909.   NaN   Inf 0.999 0.746 0.0929
accuracy(ets_fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
##   .model            .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>             <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(transactions) Test  -1569. 3338. 1625. -186.  187.   NaN   NaN 0.120

ETS - no Sundays or Holidays

ets_fit <- train_nonspecial_df |> model(ETS(transactions))
report(ets_fit)
## Series: transactions 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.4849877 
## 
##   Initial states:
##      l[0]
##  11301.53
## 
##   sigma^2:  4285722
## 
##      AIC     AICc      BIC 
## 16399.26 16399.29 16413.12
ets_fit |> gg_tsresiduals() + labs("ETS training errors")

ets_fit |> components() |> autoplot()

ets_fc <- ets_fit |> forecast(h=180)
fitted_ets <- ets_fit |> augment()

ets_fc |> autoplot(nonspecial_df, level=NULL, color='red') + 
  ggtitle("ETS Model (ANN) 180-day forecast (training set 2") +
  geom_line(data=ets_fit |> augment(), aes(y=.fitted), color='blue', lty=2)

sum(ets_fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.255849
accuracy(ets_fit)
## # A tibble: 1 × 10
##   .model            .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>             <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 ETS(transactions) Training  17.1 2067. 1133.  -Inf   Inf  1.02 0.925 0.0941
accuracy(ets_fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model            .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>             <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(transactions) Test  -408. 1152.  879. -2.82  5.36   NaN   NaN 0.441

STL + ETS

Decomposition of the All Days dataset

transactions_tsibble %>% model(STL(transactions ~ season(7) +
                                     season(365), robust=TRUE)) %>% components() |>  autoplot()

Decomposition of the No Sundays, No Holidays dataset

The new weekly season is 6 days, and the annual season is 294 (365 - 52 Sundays - 19 holidays = 294)

nonspecial_df %>% model(STL(transactions ~ season(6) +
                                     season(294), robust=TRUE)) %>% components() |>  autoplot()

STL + ETS - All Days

dcmp_spec <- decomposition_model(STL(transactions ~ season(7) + season(365), robust=TRUE),
                    ETS(season_adjust))

dcmp_ets <- train_transactions_tsibble |> model(dcmp_spec) 
report(dcmp_ets)
## Series: transactions 
## Model: STL decomposition model 
## Combination: season_adjust + season_7 + season_365
## 
## ==================================================
## 
## Series: season_adjust + season_7 
## Model: COMBINATION 
## Combination: season_adjust + season_7
## 
## =====================================
## 
## Series: season_adjust 
## Model: ETS(A,Ad,A) 
##   Smoothing parameters:
##     alpha = 0.06896141 
##     beta  = 0.0001000607 
##     gamma = 0.0001000321 
##     phi   = 0.94292 
## 
##   Initial states:
##      l[0]     b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]     s[-5]
##  9509.027 390.1202 -182.1924 -148.0245 -89.21192 62.65256 290.1151 -373.4501
##     s[-6]
##  440.1113
## 
##   sigma^2:  3938723
## 
##      AIC     AICc      BIC 
## 20724.24 20724.63 20787.23 
## 
## Series: season_7 
## Model: SNAIVE 
## 
## sigma^2: 2169.2607 
## 
## 
## Series: season_365 
## Model: SNAIVE 
## 
## sigma^2: 14877112.024
dcmp_ets |> gg_tsresiduals() + labs("STL + ETS training errors")

fc <-  dcmp_ets |> forecast(h=180)


transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
  labs(title="STL + ETS(A,Ad,A) 180-day forecast (training set 1)", xlab="Date",ylab="Transactions")

sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions[1:90])
## [1] 0.9815249
accuracy(dcmp_ets)
## # A tibble: 1 × 10
##   .model    .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -104. 4682. 1995.   NaN   Inf  1.04 0.934 0.223
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model    .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Test  3418. 7518. 4072.  19.6  23.8  4.13  5.92 0.974

STL + ETS - No Sundays or Holidays

dcmp_spec <- decomposition_model(STL(transactions ~ season(6) + season(294), robust=TRUE),
                                 ETS(season_adjust))

dcmp_ets <- train_nonspecial_df |> model(dcmp_spec) 
report(dcmp_ets)
## Series: transactions 
## Model: STL decomposition model 
## Combination: season_adjust + season_6 + season_294
## 
## ==================================================
## 
## Series: season_adjust + season_6 
## Model: COMBINATION 
## Combination: season_adjust + season_6
## 
## =====================================
## 
## Series: season_adjust 
## Model: ETS(A,Ad,N) 
##   Smoothing parameters:
##     alpha = 0.1015276 
##     beta  = 0.0001000067 
##     phi   = 0.9715424 
## 
##   Initial states:
##     l[0]     b[0]
##  15132.8 123.2774
## 
##   sigma^2:  1310069
## 
##      AIC     AICc      BIC 
## 15514.53 15514.64 15542.24 
## 
## Series: season_6 
## Model: SNAIVE 
## 
## sigma^2: 14929.3458 
## 
## 
## Series: season_294 
## Model: SNAIVE 
## 
## sigma^2: 9131833.0067
dcmp_ets |> gg_tsresiduals() + labs("STL + ETS training errors")

fc <-  dcmp_ets |> forecast(h=180)

nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
  labs(title="STL + ETS(A,Ad,N) 180-day forecast (training set 2", xlab="Date",ylab="Transactions")

sum(fc$.mean[1:90]) / sum(valid_nonspecial_df$transactions[1:90])
## [1] 0.9717736
accuracy(dcmp_ets)
## # A tibble: 1 × 10
##   .model    .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -125. 3401. 1458.  -Inf   Inf  1.32  1.52 0.715
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model    .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Test   483. 1511. 1278.  2.44  7.50   NaN   NaN 0.257

STL + ARIMA

STL + ARIMA - All Days

dcmp_spec <- decomposition_model(STL(transactions ~ season(7) + season(365), robust=TRUE),
                                 ARIMA(season_adjust, stepwise=FALSE, approx=FALSE))

dcmp_arima <- train_transactions_tsibble |> model(dcmp_spec) 
report(dcmp_arima)
## Series: transactions 
## Model: STL decomposition model 
## Combination: season_adjust + season_7 + season_365
## 
## ==================================================
## 
## Series: season_adjust + season_7 
## Model: COMBINATION 
## Combination: season_adjust + season_7
## 
## =====================================
## 
## Series: season_adjust 
## Model: ARIMA(2,1,3)(0,0,1)[7] 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2      ma3    sma1
##       1.4282  -0.7676  -2.2692  2.0326  -0.7494  0.2246
## s.e.  0.0719   0.0579   0.0637  0.0962   0.0415  0.0392
## 
## sigma^2 estimated as 3742733:  log likelihood=-8436.57
## AIC=16887.15   AICc=16887.27   BIC=16921.06
## 
## Series: season_7 
## Model: SNAIVE 
## 
## sigma^2: 2169.2607 
## 
## 
## Series: season_365 
## Model: SNAIVE 
## 
## sigma^2: 14877112.024
dcmp_arima |> gg_tsresiduals() + labs("STL + ARIMA training errors")

fc <-  dcmp_arima |> forecast(h=180)

transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) + 
  labs(title="STL + ARIMA 180-day forecast (training set 1)", xlab="Date",ylab="Transactions")

sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 0.9818345
accuracy(dcmp_arima)
## # A tibble: 1 × 10
##   .model    .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -128. 4646. 1979.   NaN   Inf  1.04 0.927 0.170
accuracy(fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
##   .model    .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 dcmp_spec Test   253. 4204. 1641. -78.5  221.   NaN   NaN -0.429

STL + ARIMA - No Sundays or Holidays

dcmp_spec <- decomposition_model(STL(transactions ~ season(6) + season(294), robust=TRUE),
                                 ARIMA(season_adjust, stepwise=FALSE, approx=FALSE))

dcmp_arima <- train_nonspecial_df |> model(dcmp_spec) 
report(dcmp_arima)
## Series: transactions 
## Model: STL decomposition model 
## Combination: season_adjust + season_6 + season_294
## 
## ==================================================
## 
## Series: season_adjust + season_6 
## Model: COMBINATION 
## Combination: season_adjust + season_6
## 
## =====================================
## 
## Series: season_adjust 
## Model: ARIMA(2,1,4) 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2      ma3      ma4
##       -0.8860  -0.9418  0.2706  0.1389  -0.8081  -0.2986
## s.e.   0.0463   0.0310  0.0539  0.0410   0.0310   0.0352
## 
## sigma^2 estimated as 1226299:  log likelihood=-6302.51
## AIC=12619.02   AICc=12619.17   BIC=12651.34
## 
## Series: season_6 
## Model: SNAIVE 
## 
## sigma^2: 14929.3458 
## 
## 
## Series: season_294 
## Model: SNAIVE 
## 
## sigma^2: 9131833.0067
dcmp_arima |> gg_tsresiduals() + labs("STL + ARIMA training errors")

fc <-  dcmp_arima |> forecast(h=180)

nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
  labs(title="STL + ARIMA 180-day forecast (training set 2)", xlab="Date",ylab="Transactions")

sum(fc$.mean[1:90]) / sum(valid_nonspecial_df$transactions)
## [1] 0.9717556
accuracy(dcmp_arima)
## # A tibble: 1 × 10
##   .model    .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -122. 3351. 1440.  -Inf   Inf  1.30  1.50 0.690
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model    .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Test   483. 1511. 1274.  2.44  7.48   NaN   NaN 0.256

Establishing a baseline with Mean, Naive, Drift models

Baseline - All Days

fit <- train_transactions_tsibble |> model(
  mean = MEAN(transactions),
  naive = NAIVE(transactions),
  drift = RW(transactions ~ drift()))

fc <- fit |> forecast(h=180)
transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
  labs(title="Baseline 180-day forecasts (training set 1)", xlab="Date",ylab="Transactions")

accuracy(fit)
## # A tibble: 3 × 10
##   .model .type           ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 mean   Training -3.85e-12 7423. 5946.  -Inf   Inf  3.11  1.48  0.161
## 2 naive  Training  1.76e+ 1 9608. 5898.  -Inf   Inf  3.09  1.92 -0.377
## 3 drift  Training  8.33e-13 9608. 5894.  -Inf   Inf  3.09  1.92 -0.377
accuracy(fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 mean   Test  -809. 6427. 4338. -1036. 1057.   NaN   NaN -0.0134
sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.057975
accuracy(fc[181:270,], valid_transactions_tsibble)
## # A tibble: 1 × 10
##   .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 naive  Test  -2816. 6970. 3580. -1191. 1195.   NaN   NaN -0.0134
sum(fc$.mean[181:270]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.201911
accuracy(fc[361:450,], valid_transactions_tsibble)
## # A tibble: 1 × 10
##   .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 drift  Test  -3618. 7386. 3941. -1259. 1261.   NaN   NaN 0.00278
sum(fc$.mean[361:450]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.259421

Baseline - No Sundays or Holidays

fit <- train_nonspecial_df |> model(
  mean = MEAN(transactions),
  naive = NAIVE(transactions),
  drift = RW(transactions ~ drift())) 

fc <- fit |> forecast(h=180)
nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) + 
  labs(title="Baseline 180-day forecasts (training set 2)", xlab="Date",ylab="Transactions")

accuracy(fit)
## # A tibble: 3 × 10
##   .model .type           ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 mean   Training -5.70e-12 3539. 1803.  -Inf   Inf 1.63   1.58  0.798
## 2 naive  Training  8.89e+ 0 2236. 1106.  -Inf   Inf 1      1    -0.288
## 3 drift  Training  4.67e-13 2236. 1104.  -Inf   Inf 0.998  1.00 -0.288
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mean   Test  -791. 1336. 1051. -5.06  6.45   NaN   NaN 0.441
sum(fc$.mean[1:90]) / sum(valid_nonspecial_df$transactions)
## [1] 1.046237
accuracy(fc[181:270,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 naive  Test  -547. 1208.  937. -3.63  5.73   NaN   NaN 0.441
sum(fc$.mean[181:270]) / sum(valid_nonspecial_df$transactions)
## [1] 1.03197
accuracy(fc[361:450,], valid_nonspecial_df)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift  Test  -951. 1530. 1217. -6.04  7.46   NaN   NaN 0.537
sum(fc$.mean[361:450]) / sum(valid_nonspecial_df$transactions)
## [1] 1.055602

Additional visuals

Weekly Transactions, Holidays, Precipitation

library(xts)

num_transactions.ts <- ts(transactions_df$transactions, start=c(2020,295), freq=365)

#creating time series vectors
total_transactions.xts <- xts(transactions_df$transactions, order.by = transactions_df$date)
isHoliday.xts <- xts(transactions_df$isHoliday, order.by = transactions_df$date)
precip.xts <- xts(transactions_df$PRCP, order.by = transactions_df$date)
tavg.xts <- xts(transactions_df$TAVG, order.by = transactions_df$date)


weekly_transactions.xts <- apply.weekly(total_transactions.xts, sum)
weekly_precip.xts <- apply.weekly(precip.xts, sum)
weekly_tavg.xts <- apply.weekly(tavg.xts, mean)

#weekly transactions with holidays
plot.xts(weekly_transactions.xts, main='Weekly Transactions over Time',ylim=c(0,150000)) 

lines(weekly_precip.xts * 1000 + 20000, col='blue', lty=2, lwd=1) 

points(isHoliday.xts * 100000-1, pch = 20, col='purple') 

addLegend("topleft",c('Weekly transactions', "Rainfall",'Holiday'), lty=c(1,2,3),
          col=c('black','blue','purple'), lwd=c(1,1,2))

Relationship with Average Temperature

transactions_tsibble |> ggplot(aes(x=TAVG, y=transactions)) + geom_point() + 
  geom_smooth(method = 'lm', se=FALSE) + labs(title="Transactions as a function of Temperature (C)")

nonspecial_df |> ggplot(aes(x=TAVG, y=transactions)) + geom_point() + 
  geom_smooth(method = 'lm', se=FALSE) + labs(title = "Transactions as a Function of Temperature (Sundays & Holidays removed)")

ACF and Difference plots

ACF - All Days

train_transactions_tsibble |> ACF(difference(transactions), lag_max = 180) |> autoplot() +
  labs(title = "ACF for Training Set 1")

train_transactions_tsibble |> autoplot(difference(transactions, lag=7)) +
  labs(title = "Training Set 1, lag-7 difference")

ACF - No Sundays, No Holidays

train_nonspecial_df |> ACF(difference(transactions), lag_max = 180) |> autoplot() +
  labs(title = "ACF for Training Set 2")

train_nonspecial_df |> autoplot(difference(transactions, lag=1)) + 
  labs(title = "Training Set 1, lag-7 difference")